Pull Data
# Load and process ILINet data
df.ilinet <- download_ilinet_data() %>%
mutate(location_name = region,
ili = unweighted_ili,
date = MMWRweek2Date(MMWRyear = year, MMWRweek = week, MMWRday = 7)) %>%
dplyr::select(date, location_name, ili) %>%
drop_na() %>%
group_by(location_name) %>%
mutate(ili = transformed(ili + 1)) %>%
ungroup() %>%
drop_na() %>%
filter(!(location_name %in% c('Virgin Islands'))) |>
mutate(location_name = ifelse(location_name == 'National', 'US', location_name))
write_csv(df.ilinet, file = file_names$ili_output)
# Load and process hospitalization data
df.hosp <- read_csv(file_names$hospitalization) %>%
filter(`AGE CATEGORY` == 'Overall' &
`SEX CATEGORY` == 'Overall' &
`RACE CATEGORY` == 'Overall') %>%
mutate(location_name = ifelse(CATCHMENT == 'Entire Network', 'US', CATCHMENT),
`WEEKLY RATE` = as.numeric(ifelse(`WEEKLY RATE` == 'null', 0, `WEEKLY RATE`))) %>%
mutate(hosp = log(`WEEKLY RATE` + 1),
date = MMWRweek2Date(MMWRyear = `MMWR-YEAR`, MMWRweek = `MMWR-WEEK`, MMWRday = 7)) %>%
filter(date <= mdy('06/30/2019')) %>%
dplyr::select(date, location_name, hosp) %>%
filter(!(location_name %in% c('Virgin Islands')))
## Rows: 124800 Columns: 12
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): CATCHMENT, NETWORK, YEAR, AGE CATEGORY, SEX CATEGORY, RACE CATEGOR...
## dbl (2): MMWR-YEAR, MMWR-WEEK
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Merge Data
# Combine ILINet and hospitalization data
df.combined <- df.hosp %>%
full_join(df.ilinet, by = c('date', 'location_name')) %>%
filter(!(location_name %in% c('Virgin Islands')))
Plot Example
# Prepare data for plotting
df.plot <- df.combined %>%
drop_na() %>%
pivot_longer(-c(date, location_name))
# Create time series plot
p1 <- df.plot %>%
ggplot(aes(date, value, color = name)) +
geom_line(alpha = 0.6) +
facet_wrap(~location_name, ncol = 5, scales = 'free_y') +
theme_bw() +
scale_color_manual(name = NULL, values = c(hosp = 'black', ili = 'red')) +
theme(legend.position = 'top',
legend.justification = 'right',
legend.text = element_text(size = 9),
legend.box.spacing = unit(0, "pt"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
show(p1)

# Create scatter plot of ILI vs hospitalizations
p2 <- ggplot(df.plot %>% pivot_wider(names_from = name, values_from = value) %>% drop_na(),
aes(ili, hosp, color = location_name, fill = location_name)) +
geom_point(alpha = 0.6) +
xlab('Normalized Unweighted ILI percentage') +
ylab('Log(Hospitalizations + 1)') +
theme_bw(18) +
scale_color_discrete(name = NULL) +
scale_fill_discrete(name = NULL) +
theme(legend.position = "top")
show(p2)

# Create faceted scatter plot
p3 <- ggplot(df.plot %>% pivot_wider(names_from = name, values_from = value) %>% drop_na(),
aes(ili, hosp)) +
geom_point(alpha = 0.5) +
facet_wrap(~location_name, ncol = 5) +
xlab('Normalized Unweighted ILI percentage') +
ylab('Log(Hospitalizations + 1)') +
theme_bw()
show(p3)

#ggsave(plot = p3, 'model_scatter.png', width = 9, height = 3.5, dpi = 300)
#ggsave(plot = p3, 'model_scatter.pdf', width = 9, height = 3.5)
Make Regression Models
# Create linear regression models for each location
df.models <- df.combined %>%
drop_na() %>%
mutate(location = location_name) |>
select(-location_name) |>
group_by(location) %>%
do({model = lm(hosp ~ ili, data = .);
data.frame(intercept = round(coef(model)[1], digits = 2),
slope = round(coef(model)[2], digits = 2),
`p-value` = summary(model)$coefficients[2,4],
`R^2` = round(summary(model)$r.squared, digits = 2))})
# Display model results in a table
t1 <- df.models %>%
kbl() %>%
kable_paper('striped',
full_width = F,
html_font = 'sans-serif')
save_kable(t1, file = "fit_summary.pdf")
## Note that HTML color may not be displayed on PDF properly.
## save_kable will have the best result with magick installed.
t1
|
location
|
intercept
|
slope
|
p.value
|
R.2
|
|
California
|
0.38
|
0.59
|
0
|
0.41
|
|
Colorado
|
0.70
|
0.61
|
0
|
0.53
|
|
Connecticut
|
0.75
|
0.71
|
0
|
0.59
|
|
Georgia
|
0.43
|
0.46
|
0
|
0.60
|
|
Maryland
|
0.67
|
0.54
|
0
|
0.43
|
|
Minnesota
|
0.32
|
0.67
|
0
|
0.61
|
|
New Mexico
|
0.51
|
0.63
|
0
|
0.54
|
|
Oregon
|
0.45
|
0.59
|
0
|
0.57
|
|
Tennessee
|
0.52
|
0.62
|
0
|
0.63
|
|
US
|
0.16
|
0.83
|
0
|
0.77
|
Combined Model
# Create a combined linear regression model
df.final <- df.plot %>% pivot_wider(names_from = name, values_from = value) %>% drop_na()
mod <- lm(hosp ~ ili, df.final)
summary(mod)
##
## Call:
## lm(formula = hosp ~ ili, data = df.final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.29912 -0.33575 -0.03279 0.29163 1.75523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.509009 0.009457 53.82 <2e-16 ***
## ili 0.588127 0.010897 53.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4612 on 2738 degrees of freedom
## Multiple R-squared: 0.5155, Adjusted R-squared: 0.5153
## F-statistic: 2913 on 1 and 2738 DF, p-value: < 2.2e-16
Output Predicted Hospitalizations for Plotting
# Prepare ILINet data for prediction
tmp.ilinet <- download_ilinet_data() %>%
mutate(location_name = region,
ili = unweighted_ili,
date = MMWRweek2Date(MMWRyear = year, MMWRweek = week, MMWRday = 7)) %>%
dplyr::select(date, location_name, ili) %>%
drop_na() %>%
group_by(location_name) %>%
mutate(ili = transformed(ili + 1)) %>%
ungroup() %>%
drop_na() %>%
filter(!(location_name %in% c('Virgin Islands')))
# Predict hospitalizations using the combined model
tmp.predicted.hosp <- bind_cols(tmp.ilinet, pred_hosp = round(exp(predict(mod, tmp.ilinet)) - 1, digits = 2)) %>%
mutate(pred_hosp = ifelse(pred_hosp < 0, 0, pred_hosp))
#write_csv(tmp.predicted.hosp, file = file_names$output_predicted)
Make Historical Time Series
df.ilinet <- df.ilinet %>%
pivot_wider(names_from = location_name, values_from = ili) %>%
fill(everything(), .direction = "down") %>%
pivot_longer(cols = -date,
names_to = "location_name",
values_to = "ili") %>%
arrange(location_name, date)
# Prepare predicted hospitalization data
predicted.hosp <- bind_cols(df.ilinet, pred_hosp = round(exp(predict(mod, df.ilinet)) - 1, digits = 2)) %>%
mutate(pred_hosp = ifelse(pred_hosp < 0, 0, pred_hosp))
# Create historical time series plot
p4 <- ggplot(predicted.hosp, aes(date, pred_hosp)) +
geom_line() +
facet_wrap(~location_name, ncol = 3, scale = 'free_y') +
theme_bw()
show(p4)
## Warning: Removed 679 rows containing missing values or values outside the scale
## range (`geom_line()`).

Pull fractional influenza in ED visits
## Fetching batch: 1 Offset: 0
## Rows retrieved: 3300
## Fetching batch: 2 Offset: 3300
## Rows retrieved: 3300
## Fetching batch: 3 Offset: 6600
## Rows retrieved: 3300
## Fetching batch: 4 Offset: 9900
## Rows retrieved: 3300
## Fetching batch: 5 Offset: 13200
## Rows retrieved: 3300
## Fetching batch: 6 Offset: 16500
## Rows retrieved: 3300
## Fetching batch: 7 Offset: 19800
## Rows retrieved: 3300
## Fetching batch: 8 Offset: 23100
## Rows retrieved: 3300
## Fetching batch: 9 Offset: 26400
## Rows retrieved: 3300
## Fetching batch: 10 Offset: 29700
## Rows retrieved: 3300
## Fetching batch: 11 Offset: 33000
## Rows retrieved: 3300
## Fetching batch: 12 Offset: 36300
## Rows retrieved: 3300
## Fetching batch: 13 Offset: 39600
## Rows retrieved: 3300
## Fetching batch: 14 Offset: 42900
## Rows retrieved: 3300
## Fetching batch: 15 Offset: 46200
## Rows retrieved: 3300
## Fetching batch: 16 Offset: 49500
## Rows retrieved: 3300
## Fetching batch: 17 Offset: 52800
## Rows retrieved: 3300
## Fetching batch: 18 Offset: 56100
## Rows retrieved: 3300
## Fetching batch: 19 Offset: 59400
## Rows retrieved: 3300
## Fetching batch: 20 Offset: 62700
## Rows retrieved: 3300
## Fetching batch: 21 Offset: 66000
## Rows retrieved: 3300
## Fetching batch: 22 Offset: 69300
## Rows retrieved: 3300
## Fetching batch: 23 Offset: 72600
## Rows retrieved: 3300
## Fetching batch: 24 Offset: 75900
## Rows retrieved: 3300
## Fetching batch: 25 Offset: 79200
## Rows retrieved: 3300
## Fetching batch: 26 Offset: 82500
## Rows retrieved: 3300
## Fetching batch: 27 Offset: 85800
## Rows retrieved: 3300
## Fetching batch: 28 Offset: 89100
## Rows retrieved: 3300
## Fetching batch: 29 Offset: 92400
## Rows retrieved: 3300
## Fetching batch: 30 Offset: 95700
## Rows retrieved: 3300
## Fetching batch: 31 Offset: 99000
## Rows retrieved: 3300
## Fetching batch: 32 Offset: 102300
## Rows retrieved: 3300
## Fetching batch: 33 Offset: 105600
## Rows retrieved: 3300
## Fetching batch: 34 Offset: 108900
## Rows retrieved: 3300
## Fetching batch: 35 Offset: 112200
## Rows retrieved: 3300
## Fetching batch: 36 Offset: 115500
## Rows retrieved: 3300
## Fetching batch: 37 Offset: 118800
## Rows retrieved: 3300
## Fetching batch: 38 Offset: 122100
## Rows retrieved: 3300
## Fetching batch: 39 Offset: 125400
## Rows retrieved: 3300
## Fetching batch: 40 Offset: 128700
## Rows retrieved: 3300
## Fetching batch: 41 Offset: 132000
## Rows retrieved: 3300
## Fetching batch: 42 Offset: 135300
## Rows retrieved: 3300
## Fetching batch: 43 Offset: 138600
## Rows retrieved: 3300
## Fetching batch: 44 Offset: 141900
## Rows retrieved: 3300
## Fetching batch: 45 Offset: 145200
## Rows retrieved: 3300
## Fetching batch: 46 Offset: 148500
## Rows retrieved: 3300
## Fetching batch: 47 Offset: 151800
## Rows retrieved: 3300
## Fetching batch: 48 Offset: 155100
## Rows retrieved: 3300
## Fetching batch: 49 Offset: 158400
## Rows retrieved: 3300
## Fetching batch: 50 Offset: 161700
## Rows retrieved: 3300
## Fetching batch: 51 Offset: 165000
## Rows retrieved: 3300
## Fetching batch: 52 Offset: 168300
## Rows retrieved: 3300
## Fetching batch: 53 Offset: 171600
## Rows retrieved: 3300
## Fetching batch: 54 Offset: 174900
## Rows retrieved: 3300
## Fetching batch: 55 Offset: 178200
## Rows retrieved: 3300
## Fetching batch: 56 Offset: 181500
## Rows retrieved: 3300
## Fetching batch: 57 Offset: 184800
## Rows retrieved: 3300
## Fetching batch: 58 Offset: 188100
## Rows retrieved: 3300
## Fetching batch: 59 Offset: 191400
## Rows retrieved: 3300
## Fetching batch: 60 Offset: 194700
## Rows retrieved: 3300
## Fetching batch: 61 Offset: 198000
## Rows retrieved: 3300
## Fetching batch: 62 Offset: 201300
## Rows retrieved: 3300
## Fetching batch: 63 Offset: 204600
## Rows retrieved: 3300
## Fetching batch: 64 Offset: 207900
## Rows retrieved: 3300
## Fetching batch: 65 Offset: 211200
## Rows retrieved: 3300
## Fetching batch: 66 Offset: 214500
## Rows retrieved: 3300
## Fetching batch: 67 Offset: 217800
## Rows retrieved: 3300
## Fetching batch: 68 Offset: 221100
## Rows retrieved: 3300
## Fetching batch: 69 Offset: 224400
## Rows retrieved: 3300
## Fetching batch: 70 Offset: 227700
## Rows retrieved: 3300
## Fetching batch: 71 Offset: 231000
## Rows retrieved: 3300
## Fetching batch: 72 Offset: 234300
## Rows retrieved: 3300
## Fetching batch: 73 Offset: 237600
## Rows retrieved: 3300
## Fetching batch: 74 Offset: 240900
## Rows retrieved: 3300
## Fetching batch: 75 Offset: 244200
## Rows retrieved: 3300
## Fetching batch: 76 Offset: 247500
## Rows retrieved: 3300
## Fetching batch: 77 Offset: 250800
## Rows retrieved: 3300
## Fetching batch: 78 Offset: 254100
## Rows retrieved: 3300
## Fetching batch: 79 Offset: 257400
## Rows retrieved: 3300
## Fetching batch: 80 Offset: 260700
## Rows retrieved: 3300
## Fetching batch: 81 Offset: 264000
## Rows retrieved: 3300
## Fetching batch: 82 Offset: 267300
## Rows retrieved: 3300
## Fetching batch: 83 Offset: 270600
## Rows retrieved: 3300
## Fetching batch: 84 Offset: 273900
## Rows retrieved: 3300
## Fetching batch: 85 Offset: 277200
## Rows retrieved: 3300
## Fetching batch: 86 Offset: 280500
## Rows retrieved: 3300
## Fetching batch: 87 Offset: 283800
## Rows retrieved: 3300
## Fetching batch: 88 Offset: 287100
## Rows retrieved: 3300
## Fetching batch: 89 Offset: 290400
## Rows retrieved: 3300
## Fetching batch: 90 Offset: 293700
## Rows retrieved: 3300
## Fetching batch: 91 Offset: 297000
## Rows retrieved: 3300
## Fetching batch: 92 Offset: 300300
## Rows retrieved: 3300
## Fetching batch: 93 Offset: 303600
## Rows retrieved: 3300
## Fetching batch: 94 Offset: 306900
## Rows retrieved: 3300
## Fetching batch: 95 Offset: 310200
## Rows retrieved: 3300
## Fetching batch: 96 Offset: 313500
## Rows retrieved: 3300
## Fetching batch: 97 Offset: 316800
## Rows retrieved: 3300
## Fetching batch: 98 Offset: 320100
## Rows retrieved: 3300
## Fetching batch: 99 Offset: 323400
## Rows retrieved: 3300
## Fetching batch: 100 Offset: 326700
## Rows retrieved: 3300
## Fetching batch: 101 Offset: 330000
## Rows retrieved: 3300
## Fetching batch: 102 Offset: 333300
## Rows retrieved: 3300
## Fetching batch: 103 Offset: 336600
## Rows retrieved: 3300
## Fetching batch: 104 Offset: 339900
## Rows retrieved: 3300
## Fetching batch: 105 Offset: 343200
## Rows retrieved: 3300
## Fetching batch: 106 Offset: 346500
## Rows retrieved: 3300
## Fetching batch: 107 Offset: 349800
## Rows retrieved: 3300
## Fetching batch: 108 Offset: 353100
## Rows retrieved: 3300
## Fetching batch: 109 Offset: 356400
## Rows retrieved: 3300
## Fetching batch: 110 Offset: 359700
## Rows retrieved: 3300
## Fetching batch: 111 Offset: 363000
## Rows retrieved: 3300
## Fetching batch: 112 Offset: 366300
## Rows retrieved: 3300
## Fetching batch: 113 Offset: 369600
## Rows retrieved: 3300
## Fetching batch: 114 Offset: 372900
## Rows retrieved: 3300
## Fetching batch: 115 Offset: 376200
## Rows retrieved: 3300
## Fetching batch: 116 Offset: 379500
## Rows retrieved: 3300
## Fetching batch: 117 Offset: 382800
## Rows retrieved: 3300
## Fetching batch: 118 Offset: 386100
## Rows retrieved: 3300
## Fetching batch: 119 Offset: 389400
## Rows retrieved: 3300
## Fetching batch: 120 Offset: 392700
## Rows retrieved: 3300
## Fetching batch: 121 Offset: 396000
## Rows retrieved: 3300
## Fetching batch: 122 Offset: 399300
## Rows retrieved: 3300
## Fetching batch: 123 Offset: 402600
## Rows retrieved: 3300
## Fetching batch: 124 Offset: 405900
## Rows retrieved: 3300
## Fetching batch: 125 Offset: 409200
## Rows retrieved: 3300
## Fetching batch: 126 Offset: 412500
## Rows retrieved: 3300
## Fetching batch: 127 Offset: 415800
## Rows retrieved: 3300
## Fetching batch: 128 Offset: 419100
## Rows retrieved: 3300
## Fetching batch: 129 Offset: 422400
## Rows retrieved: 3300
## Fetching batch: 130 Offset: 425700
## Rows retrieved: 3300
## Fetching batch: 131 Offset: 429000
## Rows retrieved: 3300
## Fetching batch: 132 Offset: 432300
## Rows retrieved: 3300
## Fetching batch: 133 Offset: 435600
## Rows retrieved: 3300
## Fetching batch: 134 Offset: 438900
## Rows retrieved: 3300
## Fetching batch: 135 Offset: 442200
## Rows retrieved: 3300
## Fetching batch: 136 Offset: 445500
## Rows retrieved: 3300
## Fetching batch: 137 Offset: 448800
## Rows retrieved: 3300
## Fetching batch: 138 Offset: 452100
## Rows retrieved: 3300
## Fetching batch: 139 Offset: 455400
## Rows retrieved: 3300
## Fetching batch: 140 Offset: 458700
## Rows retrieved: 3300
## Fetching batch: 141 Offset: 462000
## Rows retrieved: 3300
## Fetching batch: 142 Offset: 465300
## Rows retrieved: 3300
## Fetching batch: 143 Offset: 468600
## Rows retrieved: 3300
## Fetching batch: 144 Offset: 471900
## Rows retrieved: 1958
## Total batches: 144 Total rows: 473858
## [1] "Total rows retrieved: 473858"
Pull new NHSN data
fetch_flu <- function() {
#read data from data.cdc.gov, filtering for when flu reporting became mandatory
health_data = RSocrata::read.socrata(url = "https://data.cdc.gov/resource/mpgq-jmmr.json") %>%
dplyr::filter(weekendingdate >= as.Date("2024-05-01"))
#remove VI and AS as they are not included for FluSight, keep only necessary vars and add epiweek and epiyear
recent_data = health_data %>%
dplyr::filter(!jurisdiction %in% c("VI", "AS", "GU", "MP")) %>%
dplyr::select(jurisdiction, weekendingdate, totalconfflunewadm) %>%
dplyr::rename("value" = "totalconfflunewadm", "date"="weekendingdate", "state"="jurisdiction") %>%
dplyr::mutate(date = as.Date(date),
value = as.numeric(value),
state = str_replace(state, "USA", "US"))
#bind state population data
full_data = dplyr::left_join(recent_data, cdc.locations.file, by = join_by("state" == "abbreviation"))
# Drop rows with missing location_name, group and interpolate non-numeric/NA values in `value`
full_data <- full_data %>%
dplyr::filter(!is.na(location_name)) %>%
dplyr::arrange(location_name, date) %>%
dplyr::group_by(location_name) %>%
dplyr::mutate(value = zoo::na.approx(value, x = as.numeric(date), na.rm = FALSE)) %>%
dplyr::ungroup()
#calculates weekly rate
final_dat <- full_data %>%
left_join(population, by = c("location_name")) %>%
dplyr::mutate(weekly_rate = (value*100000)/population) %>%
select(date, location, location_name, value, weekly_rate)
return(final_dat)
}
most_recent_flu <- fetch_flu() |>
mutate(total_hosp = value) |>
select(date, location_name, total_hosp)
## Warning in RSocrata::read.socrata(url =
## "https://data.cdc.gov/resource/mpgq-jmmr.json"): Dates and currency fields will
## be converted to character
Compare To Current Data
interpolate_internal <- function(y, x) {
# Perform spline interpolation (includes extrapolation)
spline_values <- na.spline(y, x = x)
# Use na.approx() to identify internal NAs (without extrapolation)
internal_nas <- is.na(na.approx(y, x = x, na.rm = FALSE))
# Set extrapolated values to NA
spline_values[internal_nas] <- NA
return(spline_values)
}
# Load and process historical hospitalization data
df.old.true.hosp <- read_csv(file_names$old_true_hosp_1) %>%
filter(date >= mdy('07/01/2021') & date < mdy('06/01/2022')) %>%
mutate(true_hosp = value) %>%
select(-value, -location)
## Rows: 10010 Columns: 4
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): location, location_name
## dbl (1): value
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load and process current hospitalization data
df.true.hosp <- read_csv(file_names$old_true_hosp_2) %>%
filter(date >= mdy('06/01/2022')) %>%
mutate(true_hosp = value) %>%
select(-'...1', -value, -location, -weekly_rate) %>%
full_join(df.old.true.hosp)
## New names:
## Rows: 6148 Columns: 6
## ── Column specification
## ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── Delimiter: "," chr
## (2): location, location_name dbl (3): ...1, value, weekly_rate date (1): date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Joining with `by = join_by(date, location_name, true_hosp)`
## • `` -> `...1`
# Combine predicted and true hospitalization data
df.combined.hosp <- predicted.hosp %>%
filter(date <= mdy('06/30/2019')) %>%
mutate(date = date + days(728)) %>%
full_join(df.true.hosp, by = c('date', 'location_name')) %>%
full_join(predicted.hosp %>% filter(date > max(df.true.hosp$date))) %>%
full_join(df.ed, by = c('date', 'location_name')) %>%
mutate(scale_factor = ifelse(is.na(scale_factor), 1, scale_factor)) %>%
inner_join(population, by = 'location_name') %>%
mutate(prelim_hosp = round(coalesce(pred_hosp * population / 100000, true_hosp))) %>%
mutate(total_hosp = round(prelim_hosp * scale_factor)) %>%
#mutate(total_hosp = round(prelim_hosp)) |>
filter(date < mdy('05/01/2024')) |>
bind_rows(most_recent_flu) |>
arrange(location_name, date) %>%
filter(location_name != 'Virgin Islands') |>
group_by(location_name) |>
complete(date = seq(min(date), max(date), by = "week")) |>
mutate(total_hosp = interpolate_internal(total_hosp, as.numeric(date))) |>
mutate(total_hosp = ifelse(total_hosp < 0, 0, total_hosp)) |>
ungroup()
## Joining with `by = join_by(date, location_name, ili, pred_hosp)`
# Sanity check for date differences
sanity <- df.combined.hosp %>%
arrange(location_name, date) %>%
group_by(location_name) %>%
mutate(diffs = difftime(date , lag(date, 1)))
unique(sanity$diffs)
## [1] NA 7
# Create plot of combined hospitalization data
p5 <- ggplot(df.combined.hosp, aes(date, total_hosp)) +
geom_point(size = 1.1, alpha = 0.3) +
geom_line(alpha = 0.5) +
facet_wrap(~location_name, ncol = 3, scales = 'free_y') +
theme_bw()
show(p5)
## Warning: Removed 35464 rows containing missing values or values outside the
## scale range (`geom_point()`).
## Warning: Removed 679 rows containing missing values or values outside the scale
## range (`geom_line()`).

Output Stitched Time Series
# Save combined hospitalization data
#write_csv(df.combined.hosp, file_names$output_final)
write_csv(df.combined.hosp, file_names$output_final_save)
Compare To Current Data
df.combined.hosp <- read_csv(file_names$output_final_save)
## Rows: 71815 Columns: 9
## ── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): location_name
## dbl (7): ili, pred_hosp, true_hosp, scale_factor, population, prelim_hosp, ...
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create plot of recent hospitalization data
p6 <- ggplot(df.combined.hosp %>% filter(date >= mdy('06/01/2023')), aes(date, total_hosp)) +
geom_point(size = 1.1, alpha = 0.3) +
geom_line(alpha = 0.5) +
facet_wrap(~location_name, ncol = 3, scales = 'free_y') +
theme_bw()
show(p6)
